import pickle

import numpy as np
from sklearn.cluster import DBSCAN
from sklearn.neighbors import NearestNeighbors

if __name__ == '__main__':
    # Load encoded coordinates and Shapley values from pickle file
    with open('encoded_coord_temp.pkl', 'rb') as f:
        encoded_coor_temp = pickle.load(f)

    # Separate the coordinates and the Shapley values
    coord, shapley = encoded_coor_temp[0], encoded_coor_temp[1]

    # Get the indexes of the Shapley values in descending order
    sorted_indexes = np.argsort(-shapley)
    # Sort the Shapley values using the sorted indexes
    shapley_sorted = shapley[sorted_indexes]

    # Determine the point at which to split the data into hot and cold
    split_point = int(len(shapley_sorted) * 0.4)

    # Get the hot points (top 40%) and all points sorted by Shapley values
    heat_points = coord[sorted_indexes[:split_point]]

    # Set the number of neighbors for the nearest neighbors calculation
    k = 50
    X = heat_points
    # Fit a nearest neighbors model to the hot points
    nbrs = NearestNeighbors(n_neighbors=k + 1).fit(X)
    # Get the distances and indices of the nearest neighbors
    distances, indices = nbrs.kneighbors(X)
    # Initialize an array to hold the kth distances
    k_dist = np.zeros(len(X))
    # Calculate the kth distance for each point
    for i in range(len(X)):
        k_dist[i] = distances[i][-1]

    # Determine the 75th percentile of the kth distances to set as the eps parameter for DBSCAN
    percentile = 75
    eps = np.percentile(k_dist, percentile)

    # Fit a DBSCAN clustering model to the hot points using the calculated eps and a minimum sample size of 2k
    dbscan = DBSCAN(eps=eps, min_samples=k * 2)
    labels = dbscan.fit_predict(X)

    # Save the DBSCAN model and the labels to a pickle file
    with open('dbscan_model_label.pkl', 'wb') as f:
        pickle.dump((dbscan, heat_points, labels), f)
